Demonstrate data acquisition, data preparation, data analysis and visualization.
(For required files: https://github.com/RamBarankin/Solar_Market.git).
Find the best place to invest in current patterns according to local climate, current electricity prices, and attitude toward climate change.
library(XML)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(raster)
## Loading required package: sp
##
## Attaching package: 'raster'
## The following object is masked from 'package:dplyr':
##
## select
cities <- tbl_df(shapefile("Solar_Market/US_cities/US_cities.shp"))
cities <- cities %>% plyr::rename(c("ycoord"="LAT")) %>% plyr::rename(c("xcoord"="LONG")) %>%
filter(CLASS=="city") %>% dplyr::select(NAME, ST, LAT, LONG)
This is the resulted dataframe:
## # A tibble: 2,561 × 4
## NAME ST LAT LONG
## <chr> <chr> <dbl> <dbl>
## 1 Bullhead City AZ 35.12057 -114.5461
## 2 Lake Havasu City AZ 34.50291 -114.3136
## 3 San Luis AZ 32.48106 -114.7237
## 4 Somerton AZ 32.60101 -114.7000
## 5 Yuma AZ 32.55333 -114.5155
## 6 Adelanto CA 34.58686 -117.4412
## 7 Agoura Hills CA 34.15363 -118.7601
## 8 Alameda CA 37.77993 -122.2783
## 9 Albany CA 37.88984 -122.3002
## 10 Alhambra CA 34.08398 -118.1355
## # ... with 2,551 more rows
library(RCurl)
## Loading required package: bitops
data <- getURL("https://www.eia.gov/electricity/monthly/epm_table_grapher.cfm?t=epmt_5_6_a", ssl.verifypeer = FALSE)
perftable <- readHTMLTable(data[[1]], stringsAsFactors = T)
elctprice <- perftable[[2]]
elctprice <- tbl_df(elctprice[,c(1,10)])
colnames(elctprice) <- c("State", "Price")
This is the resulted dataframe:
## # A tibble: 62 × 2
## State Price
## <fctr> <fctr>
## 1 New England 15.85
## 2 Connecticut 16.73
## 3 Maine 13.08
## 4 Massachusetts 16.19
## 5 New Hampshire 16.00
## 6 Rhode Island 15.62
## 7 Vermont 14.35
## 8 Middle Atlantic 12.20
## 9 New Jersey 12.91
## 10 New York 13.97
## # ... with 52 more rows
us <- readHTMLTable("http://www.softschools.com/social_studies/state_abbreviations/",
header=T, which=1,stringsAsFactors=F, trim = T, skip.rows = 3)
colnames(us) <- us[3,]
us <- tbl_df(us[4:nrow(us),])
elctprice <- elctprice %>% mutate(State=toupper(State)) %>%
left_join(us) %>% dplyr::select(Abbreviation, Price) %>% plyr::rename(c("Abbreviation"="ST")) %>% na.omit()
## Joining, by = "State"
This is the resulted dataframe:
## # A tibble: 50 × 2
## ST Price
## <chr> <fctr>
## 1 CT 16.73
## 2 ME 13.08
## 3 MA 16.19
## 4 NH 16.00
## 5 RI 15.62
## 6 VT 14.35
## 7 NJ 12.91
## 8 NY 13.97
## 9 PA 10.19
## 10 IL 8.91
## # ... with 40 more rows
cities <- cities %>% left_join(elctprice)
## Joining, by = "ST"
cities$Price <- as.numeric(cities$Price)
cities <- cities %>% na.omit() %>% mutate(idx_SUN = scale(LAT), idx_SUN=idx_SUN+abs(min(idx_SUN)),
idx_PRICE = scale(Price), idx_PRICE=idx_PRICE+abs(min(idx_PRICE)),
Market_Potential_Index = idx_SUN+idx_PRICE)
Market_Potential <- quantile(cities$Market_Potential_Index)
for (i in 1:nrow(cities)){
if (cities$Market_Potential_Index[i]<=Market_Potential[2]){cities$Market_Potential[i]<-"High"} else{
if(cities$Market_Potential_Index[i]>Market_Potential[2]&cities$Market_Potential_Index[i]<=Market_Potential[3]){ cities$Market_Potential[i]<-"Medium"} else{
cities$Market_Potential[i]<-"Low"}}}
## Warning: Unknown column 'Market_Potential'
cities$Market_Potential_Index <- -cities$Market_Potential_Index+9
This is the resulted dataframe:
## # A tibble: 2,560 × 9
## NAME ST LAT LONG Price idx_SUN idx_PRICE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Bullhead City AZ 35.12057 -114.5461 49 2.023794 2.6829421
## 2 Lake Havasu City AZ 34.50291 -114.3136 49 1.905407 2.6829421
## 3 San Luis AZ 32.48106 -114.7237 49 1.517880 2.6829421
## 4 Somerton AZ 32.60101 -114.7000 49 1.540871 2.6829421
## 5 Yuma AZ 32.55333 -114.5155 49 1.531733 2.6829421
## 6 Adelanto CA 34.58686 -117.4412 14 1.921498 0.7266302
## 7 Agoura Hills CA 34.15363 -118.7601 14 1.838461 0.7266302
## 8 Alameda CA 37.77993 -122.2783 14 2.533513 0.7266302
## 9 Albany CA 37.88984 -122.3002 14 2.554579 0.7266302
## 10 Alhambra CA 34.08398 -118.1355 14 1.825110 0.7266302
## # ... with 2,550 more rows, and 2 more variables:
## # Market_Potential_Index <dbl>, Market_Potential <chr>
df <- cities
df <- df%>% plyr::rename(c("Market_Potential_Index"="Solar Market Potential index"))%>%
plyr::rename(c("Market_Potential"="Market Potential Level")) %>%
plyr::rename(c("Price"="Electricity Price (Cents per KW)"))
map <- shapefile("Solar_Market//US_cities/US_cities.shp")
map <- merge(map, df)
map1 <- map[,c(1,2, 54, 57,58)]
map1 <- map1[na.omit(map1),]
library(mapview)
## Warning: package 'mapview' was built under R version 3.3.2
## Loading required package: leaflet
m <- mapview(map1, legend=T, zcol="Solar Market Potential index")
The map:
library(twitteR)
# setup_twitter_oauth (use yourown credentials)
cities <- cities[order(-cities$Market_Potential_Index),]
poten_cities <- cities[1:10,] # The cities with highest potential
# prepare Twitter parameters:
poten_cities$LatLong <- paste0(poten_cities$LAT, ",", poten_cities$LONG,
", ", " 2mi") #location
date = as.character(Sys.Date()) # Today's date
poten_cities$Twitnum <- 0 # the variable to be used to count tweats
for (i in 1:nrow(poten_cities)){
tw = searchTwitter('#climatechange', n = 1e4, since = date, geocode=poten_cities$LatLong[i])
poten_cities$Twitnum[i] <- length(tw)
} #get and store the # of tweats
poten_cities$Potential <- scale(poten_cities$Twitnum) #standardize the # of tweats
poten_cities <- poten_cities[order(-poten_cities$Potential),]#order cities according to tweets
The resulting dataframe:
## # A tibble: 10 × 12
## NAME ST LAT LONG Price idx_SUN idx_PRICE
## <fctr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 South Miami FL 25.70605 -80.29535 1 0.2193168 0
## 2 Coral Gables FL 25.66430 -80.27318 1 0.2113136 0
## 3 Homestead FL 25.46640 -80.44723 1 0.1733832 0
## 4 Florida City FL 25.44182 -80.46852 1 0.1686711 0
## 5 Key West FL 24.56181 -81.76403 1 0.0000000 0
## 6 Miami Springs FL 25.81952 -80.28949 1 0.2410653 0
## 7 Miami Beach FL 25.81535 -80.13088 1 0.2402661 0
## 8 Doral FL 25.80897 -80.35417 1 0.2390423 0
## 9 Miami FL 25.79654 -80.20840 1 0.2366603 0
## 10 Sweetwater FL 25.76496 -80.37184 1 0.2306083 0
## # ... with 5 more variables: Market_Potential_Index <dbl>,
## # Market_Potential <chr>, LatLong <chr>, Twitnum <dbl>, Potential <dbl>
library(plotly)
## Warning: package 'plotly' was built under R version 3.3.2
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.3.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:raster':
##
## select
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
poten_cities$NAME <- factor(poten_cities$NAME, levels = poten_cities[["NAME"]])
p <- plot_ly(poten_cities, x=~poten_cities$NAME ,y=~poten_cities$Potential, type="bar",
marker = list(color = 'rgb(158,202,225)', range ("-1", "-0.5", "Mean", "0.5", "1"),
line = list(color = 'rgb(8,48,107)', width = 1.5))) %>%
layout(title = "Solar Market Potential - Twitter Analysis",
xaxis = list(title = ""),
yaxis = list(title = "#climatechange Tweets Relative to the Mean"))
The graph: